# 3장 데이터 시각화 함수

# 3.1.2 벡터라이제이션 : 벡터의 연산 과정인 입력, 연산, 출력에서 벡터를 원소 하나씩 반복적으로 처리하지 않고 통째로 처리하는 기법 

# 3.2 graphics 패키지

# 3.2.1 barplot() 함수

## https://www.statmethods.net/graphs/bar.html

# 막대그림
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", xlab="Number of Gears") 

# 수평 막대그림
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE,
        names.arg=c("3 Gears", "4 Gears", "5 Gears"))

# 벡터가 아닌 행렬자료는 비중까지 그려짐
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
        xlab="Number of Gears", col=c("darkblue","red"),
        legend = rownames(counts)) 

# beside 옵션을 주면 비중이 아닌 집단 막대그림
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
        xlab="Number of Gears", col=c("darkblue","red"),
        legend = rownames(counts), beside=TRUE)

counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5   Gears"), cex.names=0.8)

par(las=2) # make label text perpendicular to axis
barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5   Gears"), cex.names=0.8)

par(las=1)

# 막대그림의 옵셥들
set.seed(1)
bar.x <- round(runif(12) * 50)
bar.x
##  [1] 13 19 29 45 10 45 47 33 31  3 10  9
barplot(bar.x) # 막대그림은 빈도 자료의 벡터를 사용하여 그림
title(main = "Vector Barplot")

barplot(-bar.x) # 음수로 줄 경우 상하가 반전
title(main = "Vector Barplot(Negative Value)")

set.seed(2);bar.y <- matrix(bar.x, ncol = 3, byrow = T)
bar.y
##      [,1] [,2] [,3]
## [1,]   13   19   29
## [2,]   45   10   45
## [3,]   47   33   31
## [4,]    3   10    9
barplot(bar.y, main = "Matrix Barplot")

barplot(bar.y, beside = TRUE, main = "Matrix Barplot by beside = TRUE")

bar.width <- rep(1:3, 4)
bar.width
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3
barplot(bar.x, width = bar.width) #막대의 폭 변경
title(main = "Vector Barplot by width 1:3")

barplot(bar.x, space = 2) # 막대 그림 사이의 공간
title(main = "Vector Barplot by space = 2")

barplot(bar.y, beside = TRUE, space = c(0.5, 2)) # 그룹별 공간을 준 그림 
title(main="Vector Barplot by space = c(0.5, 2)")

rownames(bar.y) <- paste("row", 1:4)
colnames(bar.y) <- paste("col", 1:3)
bar.y #행렬에 이름이 있으면...
##       col 1 col 2 col 3
## row 1    13    19    29
## row 2    45    10    45
## row 3    47    33    31
## row 4     3    10     9
barplot(bar.y)
title(main = "Matrix Barplot using default names.arg")

barplot(bar.x, names.arg = letters[1:length(bar.x)])
title(main = "Vector Barplot using names.arg")

barplot(bar.x, legend.text = letters[1:length(bar.x)])
title(main = "Vector Barplot using legend.text")

barplot(bar.y, legend.text = T)
title(main = "Matrix Barplot using legend.text = T")

barplot(bar.x, horiz = T, density = 5)
title(main = "Vector Barplot by horiz = T, density = 5")

barplot(bar.x, density = 15, angle = 135)
title(main = "Vector Barplot by density = 15, angle = 135")

barplot(bar.x, col = rainbow(length(bar.x)))
title(main = "Vector Barplot by rainbow color")

barplot(bar.y, border = "red",
          col = c("lightblue", "mistyrose","lightcyan", "lavender"))
title(main = "Matrix Barplot by col, border")

barplot(bar.x, axes = FALSE)
title(main = "Vector Barplot by axes = FALSE")

barplot(bar.y, cex.axis = 1.8, ylim = c(0, 90))
title(main = "Matrix Barplot by cex.axis,ylim")

barplot(bar.y, axisnames = T, cex.names = 1.8, axis.lty = 2)
title(main = "Matrix Barplot by cex.names, axis.lty")

# 3.2.2 boxplot() 함수
# https://www.statmethods.net/graphs/boxplot.html

# Boxplot of MPG by Car Cylinders 
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data", 
        xlab="Number of Cylinders", ylab="Miles Per Gallon") 

# Notched Boxplot of Tooth Growth Against 2 Crossed Factors
# boxes colored for ease of interpretation 
str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
boxplot(len~supp*dose, data=ToothGrowth, notch=TRUE, 
        col=(c("gold","darkgreen")),
        main="Tooth Growth", xlab="Suppliment and Dose") 
## Warning in bxp(list(stats = structure(c(8.2, 9.7, 12.25, 16.5, 21.5, 4.2, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
# Violin Plots
if(!require(vioplot)) install.packages("vioplot"); library(vioplot)
## Loading required package: vioplot
## Loading required package: sm
## Package 'sm', version 2.2-5.6: type help(sm) for summary information
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

x1 <- mtcars$mpg[mtcars$cyl==4]
x2 <- mtcars$mpg[mtcars$cyl==6]
x3 <- mtcars$mpg[mtcars$cyl==8]
vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")

# Bagplot - A 2D Boxplot Extension 
# Violin Plots
# Example of a Bagplot
if(!require(aplpack)) install.packages("aplpack"); library(aplpack)
## Loading required package: aplpack
## Loading required package: tcltk
bagplot(mtcars$wt,mtcars$mpg, 
        xlab="Car Weight", ylab="Miles Per Gallon",
        main="Bagplot Example") 

# 상자그림의 옵션
set.seed(1)
norm1 <- round(rnorm(100, 3, 2), digits = 2)
set.seed(2)
norm2 <- round(rnorm(100, 3, 3), digits = 2)

boxplot(norm1, main="boxplot of one vector")

boxplot(norm1, norm2, main="boxplot of two vectors")

# 리스트로 저장된 자료도 상자 그림으로 그려짐
list1 = list(data1 = norm1, data2 = norm2, data3 = rnorm(100, 7, 4))
list1
## $data1
##   [1]  1.75  3.37  1.33  6.19  3.66  1.36  3.97  4.48  4.15  2.39  6.02
##  [12]  3.78  1.76 -1.43  5.25  2.91  2.97  4.89  4.64  4.19  4.84  4.56
##  [23]  3.15 -0.98  4.24  2.89  2.69  0.06  2.04  3.84  5.72  2.79  3.78
##  [34]  2.89  0.25  2.17  2.21  2.88  5.20  4.53  2.67  2.49  4.39  4.11
##  [45]  1.62  1.59  3.73  4.54  2.78  4.76  3.80  1.78  3.68  0.74  5.87
##  [56]  6.96  2.27  0.91  4.14  2.73  7.80  2.92  4.38  3.06  1.51  3.38
##  [67] -0.61  5.93  3.31  7.35  3.95  1.58  4.22  1.13  0.49  3.58  2.11
##  [78]  3.00  3.15  1.82  1.86  2.73  5.36 -0.05  4.19  3.67  5.13  2.39
##  [89]  3.74  3.53  1.91  5.42  5.32  4.40  6.17  4.12  0.45  1.85  0.55
## [100]  2.05
## 
## $data2
##   [1]  0.31  3.55  7.76 -0.39  2.76  3.40  5.12  2.28  8.95  2.58  4.25
##  [12]  5.95  1.82 -0.12  8.35 -3.93  5.64  3.11  6.04  4.30  9.27 -0.60
##  [23]  7.77  8.86  3.01 -4.36  4.43  1.21  5.38  3.87  5.22  3.96  6.23
##  [34]  2.15  0.67  1.21 -2.18  0.29  1.32  2.26  1.85 -2.88  0.47  8.71
##  [45]  4.87  8.97  2.08  2.73  2.45 -0.60  0.49  9.20  1.31  6.83 -0.14
##  [56] -2.90  2.03  5.81  6.42  8.01 -2.36  9.09  0.89  3.47  4.52  0.54
##  [67] -3.00  1.56  3.25  0.31  0.24  3.99  2.58  4.30  2.84  0.28  6.91
##  [78]  5.32  6.16 -1.23  5.99 -2.09  1.40 -1.12 -3.62  8.47  1.04  2.15
##  [89]  1.84  4.16  7.80  8.04 -0.55 -1.08 -1.54 -0.76  8.88  3.02  0.47
## [100]  1.20
## 
## $data3
##   [1] 11.29783763  8.04239134  5.74291208  4.00147962  3.55120668
##   [6] 15.19216121 10.75968031 15.03474846  5.31450571  5.59666231
##  [11]  2.89047761  5.99792349  8.88743786 12.43575928  9.25667441
##  [16]  8.82392036 11.92381465 11.58854739  7.42639216  3.86673337
##  [21] 11.96479931  7.55543368 13.84252635  5.27743610  2.82308168
##  [26]  9.15031810  4.32165605  9.55522245  0.10404066  0.03027968
##  [31]  9.75921669  8.32385271 10.48427084 -1.06498233 11.85031641
##  [36] 11.80197880 11.12827330 10.14564102 15.44029406  1.18476061
##  [41]  4.66758461  8.63889593  3.77207346  7.34220176  9.98497267
##  [46]  4.38530775  9.62842393  9.19963694  3.77308257  3.01048113
##  [51] 10.90356255  6.32230728  9.88876712  3.62232557 12.10917474
##  [56]  1.62755780 10.06136268  8.85681028  8.07197311  9.67009075
##  [61]  8.59386914  4.44771588  5.92914838  8.43951826  1.74853562
##  [66]  3.46412156 15.30837918 -1.39690253  2.04597614 10.96173236
##  [71] 11.35464745 10.35940902  7.22743455  8.29551220  3.38132533
##  [76]  4.39126461  5.95018145  3.26134864 10.28464485  0.50296331
##  [81]  2.87838532  1.95228275  8.56873850  2.47424695  9.17657794
##  [86] 11.70643574  7.10091428  9.06053268  4.38356096  9.01456796
##  [91]  1.91152311  6.69291539  1.61872250  5.93472976 11.35025198
##  [96]  9.80227118  5.22896194  3.84592014  3.57289716  4.01432394
boxplot(list1)
title("boxplot of simple list")

InsectSprays
##    count spray
## 1     10     A
## 2      7     A
## 3     20     A
## 4     14     A
## 5     14     A
## 6     12     A
## 7     10     A
## 8     23     A
## 9     17     A
## 10    20     A
## 11    14     A
## 12    13     A
## 13    11     B
## 14    17     B
## 15    21     B
## 16    11     B
## 17    16     B
## 18    14     B
## 19    17     B
## 20    17     B
## 21    19     B
## 22    21     B
## 23     7     B
## 24    13     B
## 25     0     C
## 26     1     C
## 27     7     C
## 28     2     C
## 29     3     C
## 30     1     C
## 31     2     C
## 32     1     C
## 33     3     C
## 34     0     C
## 35     1     C
## 36     4     C
## 37     3     D
## 38     5     D
## 39    12     D
## 40     6     D
## 41     4     D
## 42     3     D
## 43     5     D
## 44     5     D
## 45     5     D
## 46     5     D
## 47     2     D
## 48     4     D
## 49     3     E
## 50     5     E
## 51     3     E
## 52     5     E
## 53     3     E
## 54     6     E
## 55     1     E
## 56     1     E
## 57     3     E
## 58     2     E
## 59     6     E
## 60     4     E
## 61    11     F
## 62     9     F
## 63    15     F
## 64    22     F
## 65    15     F
## 66    16     F
## 67    13     F
## 68    10     F
## 69    26     F
## 70    26     F
## 71    24     F
## 72    13     F
dim(InsectSprays)
## [1] 72  2
unique(InsectSprays$spray)
## [1] A B C D E F
## Levels: A B C D E F
boxplot(count ~ spray, data = InsectSprays, col = "lightgray")
title("boxplot of dataframe by formula")

head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
boxplot(len ~ dose, data = ToothGrowth, main="len ~ dose")

boxplot(len ~ supp, data = ToothGrowth, main="len ~ supp")

boxplot(len ~ dose + supp, data = ToothGrowth, main="len ~ dose + supp")

boxplot(len ~ supp == "VC", data = ToothGrowth) #원하는 자료만 선택
title("len ~ supp == \"VC\"")

boxplot(len ~ dose, data = ToothGrowth, subset = supp == "VC") # subset 옵션 사용 가능
title("len ~ dose, subset = supp == \"VC\"")

boxplot(len[supp == "VC"] ~ dose[supp == "VC"], data = ToothGrowth)
title("len[supp == \"VC\"] ~ dose[supp == \"VC\"]")

set.seed(3)
z <- round(rnorm(50) * 10)
summary(z)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -23.00   -7.00   -1.50   -0.66    7.00   17.00
z[50] <- 40       # 50번째 데이터를 40으로 치환하여 이상치를 만듦
summary(z)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -23.00   -7.00   -0.50    0.32    7.75   40.00
boxplot(z)
title(main="range = default(1.5)")

boxplot(z, range = 0) # 이상치 없이 최대 최소 모두 그리기
title(main="range = 0")

boxplot(z, range = 1.0) # IQR 범위까지 상자 그리기기
title(main="range = 1.0")

boxplot(z, range = 2.0) # IQR * 2 범위까지 상자 그리기
title(main="range = 2.0")

x1 <- runif(20)
x2 <- runif(20)
x3 <- runif(20)
x4 <- runif(20)
x5 <- runif(20)
x <- list(x1, x2, x3, x4, x5)

y1 <- runif(10)
y2 <- runif(40)
y3 <- runif(90)
y4 <- runif(160)
y <- list(y1, y2, y3, y4)

boxplot(x)
title(main = "default")

boxplot(x, width = 1:5)
title(main = "width = 1:5")

str(y)
## List of 4
##  $ : num [1:10] 0.731 0.86 0.343 0.493 0.854 ...
##  $ : num [1:40] 0.7285 0.4766 0.5819 0.2071 0.0289 ...
##  $ : num [1:90] 0.223 0.255 0.65 0.617 0.253 ...
##  $ : num [1:160] 0.593 0.282 0.338 0.498 0.594 ...
boxplot(y, varwidth = T)
title(main = "varwidth = T")

boxplot(y, varwidth = T, width = 4:1)
title(main = "varwidth = T & width = 4:1")

boxplot(y)
title(main = "notch = default(FALSE)")

boxplot(y, notch = T, main = "notch = TRUE")
## Warning in bxp(list(stats = structure(c(0.0328013296239078,
## 0.38725300040096, : some notches went outside hinges ('box'): maybe set
## notch=FALSE

boxplot(z, main = "outline = default(TRUE)")

boxplot(z, outline = F, main = "outline = FALSE")

# names 인수를 사용할 경우
xname <- c("x1", "x2", "x3", "x4", "x5")
boxplot(x, names = xname)
title(main = "using names argument")

# names attributes를 이용할 경우
names(x) <- c("x1", "x2", "x3", "x4", "x5")
boxplot(x)
title(main = "using names attributes")

boxplot(x, boxwex = 0.3)
title(main = "boxwex = 1")

boxplot(x, staplewex = 2)
title(main = "staplewex = 2")

boxplot(x, plot = FALSE)
## $stats
##            [,1]       [,2]       [,3]      [,4]       [,5]
## [1,] 0.04117032 0.09869789 0.03852075 0.1087810 0.09831946
## [2,] 0.18077793 0.31802044 0.23061703 0.4298766 0.26834736
## [3,] 0.37115586 0.51577862 0.46259269 0.5765672 0.34477785
## [4,] 0.69219256 0.67721069 0.72621593 0.7847074 0.63193596
## [5,] 0.91342360 0.87987004 0.88821092 0.9586770 0.90653142
## 
## $n
## [1] 20 20 20 20 20
## 
## $conf
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 0.1904737 0.3888772 0.2874982 0.4512060 0.2163224
## [2,] 0.5518380 0.6426801 0.6376872 0.7019285 0.4732332
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "x1" "x2" "x3" "x4" "x5"
boxplot(z, plot = FALSE)
## $stats
##       [,1]
## [1,] -23.0
## [2,]  -7.0
## [3,]  -0.5
## [4,]   8.0
## [5,]  17.0
## 
## $n
## [1] 50
## 
## $conf
##           [,1]
## [1,] -3.851686
## [2,]  2.851686
## 
## $out
## [1] 40
## 
## $group
## [1] 1
## 
## $names
## [1] "1"
boxplot(x, border = "magenta", col = c("lightblue", "mistyrose", "lightcyan", "lavender"))
title(main = "use border, col")

boxplot(x, horizontal = TRUE)
title(main = "horizontal = TRUE")

# at을 통해 원하는 위치에 상자그림을 그릴 수 있음
boxplot(len ~ dose, data = ToothGrowth, boxwex = 0.25, at = 1:3 - 0.2,
          subset = supp == "VC", col = "yellow", main = "Guinea Pigs' Tooth Growth",
          xlab = "Vitamin C dose mg", ylab = "tooth length", ylim = c(0, 35))

boxplot(len ~ dose, data = ToothGrowth, add = TRUE, boxwex = 0.25, at = 1:3 + 0.2,
          subset = supp == "OJ", col = "orange")

legend(2, 9, c("Ascorbic acid", "Orange juice"), fill = c("yellow", "orange"))

# 참고로 알아두면 좋은 옵션들
boxplot(x, log = "y", main = "log = \"y\"")

boxplot(x, log = "x", main = "log = \"x\"")

boxplot(x)
boxplot(y, add = TRUE)
title(main = "add = TRUE(y is added to x)")

boxplot(y, staplelty = 3)
title(main = "staplelty = 3")

boxplot(z, outpch = 2)
title(main = "outpch = 2")

# 3.2.3 dotchart() 함수
# https://www.statmethods.net/graphs/dot.html

# Simple Dotplot
dotchart(mtcars$mpg,labels=row.names(mtcars),cex=.7,
         main="Gas Milage for Car Models", 
         xlab="Miles Per Gallon")

# Dotplot: Grouped Sorted and Colored
# Sort by mpg, group and color by cylinder 
x <- mtcars[order(mtcars$mpg),] # sort by mpg
x$cyl <- factor(x$cyl) # it must be a factor
x$color[x$cyl==4] <- "red"
x$color[x$cyl==6] <- "blue"
x$color[x$cyl==8] <- "darkgreen" 
dotchart(x$mpg,labels=row.names(x),cex=.7,groups= x$cyl,
         main="Gas Milage for Car Models\ngrouped by cylinder",
         xlab="Miles Per Gallon", gcolor="black", color=x$color) 

# dotchart의 다양한 옵션
month <- matrix(1:12, ncol = 3)
rownames(month) <- paste("Row", 1:4)
colnames(month) <- paste("Col", 1:3)
month
##       Col 1 Col 2 Col 3
## Row 1     1     5     9
## Row 2     2     6    10
## Row 3     3     7    11
## Row 4     4     8    12
# (1) 벡터
dotchart(as.vector(month), label = month.abb)
title(main = "x is a vector")

# (2) 행렬
dotchart(month)
title(main = "x is a matrix")

# (3) group
quarter.name <- c("1QT", "2QT", "3QT", "4QT")
quarter <- factor(row(month), label = quarter.name)
quarter
##  [1] 1QT 2QT 3QT 4QT 1QT 2QT 3QT 4QT 1QT 2QT 3QT 4QT
## Levels: 1QT 2QT 3QT 4QT
dotchart(month, groups = quarter)
title(main = "groups = quarter")

# (4) groups, labels
name <- c("1st", "2nd", "3rd")
month
##       Col 1 Col 2 Col 3
## Row 1     1     5     9
## Row 2     2     6    10
## Row 3     3     7    11
## Row 4     4     8    12
dotchart(month, groups = quarter, labels = name)
title(main = "groups = quarter, labels = name")

dotchart(month, group = quarter, labels = month.abb)
title(main = "group=quarter, labels=month.abb")

gmean <- tapply(month, quarter, mean)
gmean
## 1QT 2QT 3QT 4QT 
##   5   6   7   8
dotchart(month, group = quarter, labels = month.abb, gdata = gmean)
title(main = "group=quarter, labels=month.abbngdata=gmean")

dotchart(month, labels = month.abb, main = "default cex")

dotchart(month, labels = month.abb, cex = 1.1, main = "cex = 1.1")

dotchart(month, labels = month.abb, pch = 2, main = "pch = 2")

dotchart(month, labels = month.abb, groups = quarter, pch = 2, gpch = 5, gdata = gmean)
title(main = "pch = 2, gpch = 5")

dotchart(month, cex = 1.1, bg = "red", main = "bg = \"red\"")

dotchart(month, cex = 1.1, bg = "red", color = "blue")
title(main = "bg = \"red\", color = \"blue\"")

dotchart(month, cex = 1.1, color = "red", gcolor = "blue", groups = quarter,
           gdata = gmean)

title(main = "color = \"red\", gcolor = \"blue\"")

dotchart(month, cex = 1.1, lcolor = "red", gcolor = "blue", groups = quarter,
           gdata = gmean)
title(main = "lcolor = \"red\", gcolor = \"blue\"")

VADeaths
##       Rural Male Rural Female Urban Male Urban Female
## 50-54       11.7          8.7       15.4          8.4
## 55-59       18.1         11.7       24.3         13.6
## 60-64       26.9         20.3       37.0         19.3
## 65-69       41.0         30.9       54.6         35.1
## 70-74       66.0         54.3       71.1         50.0
dotchart(VADeaths)
title(main = "Death Rates in Virginian(Population group)")

dotchart(t(VADeaths), xlim = c(0, 100))
title(main = "Death Rates in Virginian(Age group)")

# 3.2.4 hist() 함수
# https://www.statmethods.net/graphs/density.html

# Simple Histogram
hist(mtcars$mpg)

# Colored Histogram with Different Number of Bins
hist(mtcars$mpg, breaks=12, col="red") 

# Add a Normal Curve 
x <- mtcars$mpg 
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon", 
        main="Histogram with Normal Curve") 
xfit<-seq(min(x),max(x),length=40) 
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) 
yfit <- yfit*diff(h$mids[1:2])*length(x) 
lines(xfit, yfit, col="blue", lwd=2) 

# Kernel Density Plot
d <- density(mtcars$mpg) # returns the density data 
plot(d) # plots the results 

# Filled Density Plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue") 

# Compare MPG distributions for cars with 
#4,6, or 8 cylinders
if(!require(sm)) install.packages("sm"); library(sm)
attach(mtcars)

# create value labels 
cyl.f <- factor(cyl, levels= c(4,6,8),
                labels = c("4 cylinder", "6 cylinder", "8 cylinder")) 

# plot densities 
sm.density.compare(mpg, cyl, xlab="Miles Per Gallon")
title(main="MPG Distribution by Car Cylinders")

# add legend via mouse click
colfill<-c(2:(2+length(levels(cyl.f)))) 
legend("topright", levels(cyl.f), fill=colfill) 

detach()


set.seed(7)
hist.data <- rnorm(100, 3, 2)
hist.data <- round(hist.data, digits = 2)
summary(hist.data)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -0.570   1.880   3.210   3.278   4.442   8.430
# # Sturges 공식으로 구해진 계급의 수
# class.n <- ceiling(log(length(hist.data), base = 2) +1 )
# class.n
# 
# # pretty 함수로 구한 breaks
# hist.breaks <- pretty(hist.data, class.n)
# hist.breaks
# 
# hist(hist.data, main = "breaks = default")
# hist(hist.data, breaks = class.n, main = "nclass = class.n")
# hist(hist.data, breaks = hist.breaks, main = "breaks = hist.breaks")
# hist(hist.data, nclass = hist.breaks, main = "nclass = hist.breaks")
# 
# # 도수분포표 계산
# freq <- integer(length(hist.breaks) - 1)
# for (i in seq(freq)) {
#       freq[i] <- sum(hist.breaks[i] < hist.data & hist.data <= hist.breaks[i + 1])
#   }
# freq
# 
# nclass.Sturges(hist.data)
# nclass.scott(hist.data)
# nclass.FD(hist.data)
# pretty(hist.data, nclass.Sturges(hist.data))
# pretty(hist.data, nclass.scott(hist.data))
# pretty(hist.data, nclass.FD(hist.data))
# pretty(hist.data, 10)
# 
# hist(hist.data, breaks = "Sturges", main = "breaks = \"Sturges\"")
# hist(hist.data, breaks = "Scott", main = "breaks = \"Scott\"")
# hist(hist.data, breaks = nclass.FD, main = "breaks = nclass.FD")
# hist(hist.data, breaks = 10, main = "breaks = 10")
# 
# 
# hist.interval <- cut(hist.data, breaks = hist.breaks)
# hist.interval
# table(hist.interval)

hist(hist.data, labels = T, main = "freq = default")

hist(hist.data, freq = F, labels = T, main = "freq = FALSE, labels = T")

hist(hist.data, probability = TRUE, main = "probability = TRUE")

hist(hist.data, labels = LETTERS[1:10], main = "labels = LETTERS[1:10]")

hist(hist.data, right = FALSE, main = "right = FALSE")

hist(hist.data, xlim = c(1, 5), main = "xlim = c(1, 5)")

hist(hist.data, density = 20, col = "red", angle = 135, border = "blue")

hist(hist.data, axes = FALSE, main = "axes = FALSE")

hist(hist.data, plot = FALSE)
## $breaks
##  [1] -1  0  1  2  3  4  5  6  7  8  9
## 
## $counts
##  [1]  4  5 17 20 20 18  8  3  4  1
## 
## $density
##  [1] 0.04 0.05 0.17 0.20 0.20 0.18 0.08 0.03 0.04 0.01
## 
## $mids
##  [1] -0.5  0.5  1.5  2.5  3.5  4.5  5.5  6.5  7.5  8.5
## 
## $xname
## [1] "hist.data"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# 3.2.5 pie() 함수
# https://www.statmethods.net/graphs/pie.html

# Simple Pie Chart
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main="Pie Chart of Countries")

# Pie Chart with Percentages
slices <- c(10, 12, 4, 16, 8) 
lbls <- c("US", "UK", "Australia", "Germany", "France")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)# add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls, col=rainbow(length(lbls)),
    main="Pie Chart of Countries") 

# 3D Exploded Pie Chart
if(!require(plotrix)) install.packages("plotrix"); library(plotrix)
## Loading required package: plotrix
slices <- c(10, 12, 4, 16, 8) 
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie3D(slices,labels=lbls,explode=0.1,
      main="Pie Chart of Countries ")

# Pie Chart from data frame with Appended Sample Sizes
mytable <- table(iris$Species)
lbls <- paste(names(mytable), "\n", mytable, sep="")
pie(mytable, labels = lbls, 
    main="Pie Chart of Species\n (with sample sizes)") 

# 원도표의 옵션
set.seed(5)
pie.data <- sample(7)
pie.data
## [1] 2 3 1 6 4 5 7
pie(pie.data, main = "default")

pie(pie.data, labels = LETTERS[1:7], main = "labels = LETTERS[1:7]")

pie(pie.data, edges = 10, main = "edges = 10")

pie(pie.data, edges = 20, main = "edges = 20")

pie(pie.data, main = "default radius")

pie(pie.data, radius = 0.5, main = "radius = 0.5")

pie(pie.data, radius = 1.5, main = "radius = 1.5")

pie(pie.data, radius = 0, main = "radius = 0")

# 3.2.6 stripchart() 함수 *

set.seed(1)
x <- round(rnorm(50), 1)
set.seed(2)
y <- round(rnorm(50), 1)
set.seed(3)
z <- round(rnorm(50), 1)
strip.data <- list(x, y, z)

stripchart(x, main = "a single vector")                   # 벡터

stripchart(strip.data, main = "a list having 3 vectors")  # 리스트

with(OrchardSprays, stripchart(decrease ~ treatment,      # formula
       main = "formula decrease ~ treatment ", xlab = "treatment", ylab = "decrease"))

set.seed(3)
x <- rnorm(50)
xr <- round(x, 1)

stripchart(x)
m <- mean(par("usr")[1:2])
text(m, 1.04, "stripchart(x, \"overplot\")")

stripchart(xr, method = "stack", add = TRUE, at = 1.2)
text(m, 1.35, "stripchart(round(x, 1), \"stack\")")

stripchart(xr, method = "jitter", add = TRUE, at = 0.7)
text(m, 0.85, "stripchart(round(x,1), \"jitter\")")

with(OrchardSprays, stripchart(decrease ~ treatment, method = "jitter",
       jitter = 0.2, col = "red", pch = 16, cex = 1.5, vertical = TRUE, log = "y",
       main="stripchart(Orchardsprays)", xlab = "treatment", ylab = "decrease",
       group.names = paste(LETTERS[1:8], "group")))

with(OrchardSprays, stripchart(decrease ~ treatment, method = "stack",
       offset = 1/2, col = 1:8, pch = 15, cex = 1.5, main = "stripchart(Orchardsprays)"))

# Create some linked plots 
if(!require(iplots)) install.packages("iplots"); library(iplots)
## Loading required package: iplots
## Loading required package: rJava
ihist(mtcars$mpg) # histogram 
## ID:1 Name: "Histogram (mtcars$mpg)"
ibar(mtcars$carb) # barchart
## ID:2 Name: "Barchart (mtcars$carb)"
iplot(mtcars$mpg, mtcars$wt) # scatter plot
## ID:3 Name: "Scatterplot (mtcars$wt vs mtcars$mpg)"
ibox(mtcars[c("qsec","disp","hp")]) # boxplots 
## ID:4 Name: "Boxplot (qsec)"
ipcp(mtcars[c("mpg","wt","hp")]) # parallel coordinates
## ID:5 Name: "Parallel coord. plot (default)"
# 참고로 알면 좋은 함수
# (1) Expression
curve(x ^ 3 - 3 * x, -2, 2)
title(main = "User defined expression")

myfun <- function(x) x ^ 2 + 2

# (2) User Function
curve(myfun, -pi, pi)
title(main = "User defined function")

# (3) R Function
curve(dnorm, from = -3, to = 3)
title(main = "Normal distribution density")

# (4) plot Function
plot(dnorm, from = -3, to = 3)
title(main = "curve by plot function")

curve(dnorm, from = -3, to = 3, n = 10)
title(main = "dnorm by n = 10")

curve(dnorm, from = -1, to = 1, xlim = c(-3, 3))
title(main = "dnorm by from=-1, to=1, xlim=c(-3,3)")

curve(sin, from = -2 * pi, to = 2 * pi, lty = 1, col = "red")
curve(cos, from = -2 * pi, to = 2 * pi, lty = 2, col = "blue", add = T)
title(main = "add = TRUE")

curve(dnorm, from = -3, to = 3, log = "y")
title(main = "dnorm by log = \"y\"")

# 3.2.8 matplot(), matpoints(), matlines() 함수 *그룹별 그림을 그릴때

set.seed(10)
y1 <- rnorm(20, mean = -3, sd = 1)
set.seed(20)
y2 <- rnorm(20, mean = 0, sd = 1)
set.seed(30)
y3 <- rnorm(20, mean = 3, sd = 1)

mat <- cbind(y1, y2, y3)
mat
##              y1          y2       y3
##  [1,] -2.981254  1.16268529 1.711482
##  [2,] -3.184253 -0.58592447 2.652311
##  [3,] -4.371331  1.78546500 2.478371
##  [4,] -3.599168 -1.33259371 4.273473
##  [5,] -2.705455 -0.44656677 4.824521
##  [6,] -2.610206  0.56960612 1.488692
##  [7,] -4.208076 -2.88971761 3.110508
##  [8,] -3.363676 -0.86901834 2.239204
##  [9,] -4.626673 -0.46170268 2.330103
## [10,] -3.256478 -0.55554091 3.274520
## [11,] -1.898220 -0.02013537 1.976728
## [12,] -2.244218 -0.15038222 1.180602
## [13,] -3.238234 -0.62812676 2.332210
## [14,] -2.012555  1.32322085 2.940702
## [15,] -2.258610 -1.52135057 3.880166
## [16,] -2.910653 -0.43742787 3.268513
## [17,] -3.954944  0.97057758 2.980421
## [18,] -3.195150  0.02822264 2.475053
## [19,] -2.074479 -0.08578219 1.590669
## [20,] -2.517021  0.38921440 1.166011
matplot(y1, type = "l", main = "One vecter argument")

matplot(y1, y2, main = "Two vecter arguments")

matplot(mat, main = "Matrix argument")

matplot(mat, type = "n", main = "Add matlines, matpoints")
matlines(mat)
matpoints(mat)

matplot(mat, type = "lSo", main = "type = \"lSo\"")

matplot(mat, type = c("l","S","o"), main = "type = c(\"l\", \"S\", \"o\")")

matplot(mat, col = c("red","blue","green"), cex = c(1, 1.2, 1.4))
title(main = "c(\"red\", \"blue\", \"green\"), cex=c(1, 1.2, 1.4)")

matplot(mat, type = "l", lty = 3:5, lwd = 1:3)
title(main = "lty = 3:5, lwd = 1:3")

matplot(mat)
matplot(rnorm(20), type = "l", add = TRUE)
title(main = "matplot add matplot")

matplot(mat, type = "n")
matlines(rnorm(20), type = "p")
title(main = "matlines type = \"p\"")

matplot(mat, type = "n")
matpoints(rnorm(20), type = "l")

title(main = "matpoints type = \"l\"")

matplot(mat, pch = 1:3, col = 3:5, verbose = TRUE) # matplot 정보가 출력됨
## matplot: doing 3 plots with  col= ("3" "4" "5") pch= ("1" "2" "3") ...
title(main = "pch = 1:3")

# 3.2.9 qqnrom(), qqline(), qqplot() 함수 * 분위수 그림 (생략)
data1 <- round(rnorm(100, 3, 2), 2)
set.seed(2)
data2 <- round(rnorm(100, 3, 2), 2)

qqplot(data1, data2, main = "Q-Q 플롯")
abline(0,1)

plot(sort(data1), sort(data2), main = "plot of sorted data")
abline(0,1)

x <- round(rnorm(10), 2)
y <- round(rnorm(10), 2)
qqplot(x, y, plot.it = FALSE)
.Last.value
## $help_type
## NULL
qq <- qqplot(x, y, plot.it = FALSE)
qq
## $x
##  [1] -0.86 -0.75 -0.42 -0.35 -0.31  0.26  0.94  1.07  2.01  2.05
## 
## $y
##  [1] -1.03 -0.78 -0.25  0.11  0.46  0.47  0.56  1.15  1.23  1.36
set.seed(1)
data1 <- rnorm(100, 3, 2)
set.seed(2)
data3 <- round(rnorm(50), 2)
qqplot(data1, data3, main = "Q-Q 플롯 length(data1) != length(data3)")
abline(-3/2, 1/2)
abline(0, 1, lty = 2)

seq.odd <- seq(1, 99, 2)
seq.even <- seq(2, 100, 2)
data.odd <- sort(data1)[seq.odd]
data.even <- sort(data1)[seq.even]
data.mean <- (data.odd + data.even ) / 2
plot(data.mean, sort(data3), main = "plot of modified data")
abline(-3/2, 1/2)
abline(0, 1, lty = 2)

set.seed(4)
data4 <- rnorm(50)
seq.norm <- seq(1, 99, 2) / 100    # 분위수를 구하기 위한 시퀀스
qnorm.data <- qnorm(seq.norm)  # 표준 정규분포의 분위수 계산
par(mfrow = c(2,1))
qqnorm(data4, main = "Q-Q Norm")
used.qqnorm <- .Last.value    # qqnorm이 사용한 데이터 추출
abline(0, 1)
qqline(data4, lty = 2)
plot(qnorm.data, sort(data4), main = "using seq")
abline(0, 1)
qqline(data4, lty = 2)

sort(data4)        # data4를 정렬한 순서통계량
##  [1] -1.79738202 -1.68804858 -1.48218912 -1.28124663 -0.92802811
##  [6] -0.86214614 -0.82099358 -0.75421121 -0.63754350 -0.54249257
## [11] -0.46589588 -0.40451983 -0.37565514 -0.28344457 -0.28294368
## [16] -0.22740542 -0.21314452 -0.10036844 -0.04513712 -0.04420400
## [21]  0.01571945  0.03435191  0.09884369  0.15346418  0.16516902
## [26]  0.16902677  0.18153538  0.21675486  0.38305734  0.56660450
## [31]  0.59289694  0.59598058  0.68927544  0.72390416  0.86113187
## [36]  0.89114465  0.90983915  0.93409617  1.05193258  1.16502684
## [41]  1.24018084  1.25588403  1.28825688  1.29251234  1.30762236
## [46]  1.34370863  1.54081498  1.63561800  1.77686321  1.89653987
sort(used.qqnorm$y)      # qqnorm이 사용한 y 좌표값
## NULL
qnorm.data        # 표준 정규분포의 분위수
##  [1] -2.32634787 -1.88079361 -1.64485363 -1.47579103 -1.34075503
##  [6] -1.22652812 -1.12639113 -1.03643339 -0.95416525 -0.87789630
## [11] -0.80642125 -0.73884685 -0.67448975 -0.61281299 -0.55338472
## [16] -0.49585035 -0.43991317 -0.38532047 -0.33185335 -0.27931903
## [21] -0.22754498 -0.17637416 -0.12566135 -0.07526986 -0.02506891
## [26]  0.02506891  0.07526986  0.12566135  0.17637416  0.22754498
## [31]  0.27931903  0.33185335  0.38532047  0.43991317  0.49585035
## [36]  0.55338472  0.61281299  0.67448975  0.73884685  0.80642125
## [41]  0.87789630  0.95416525  1.03643339  1.12639113  1.22652812
## [46]  1.34075503  1.47579103  1.64485363  1.88079361  2.32634787
sort(used.qqnorm$x)      # qqnorm이 사용한 x 좌표값
## NULL
# 3.2.10 sunflowerplot() 함수

x <- NULL
y <- NULL
for (i in 1:50) {
      x <- c(x, rep(ifelse(i%%10 == 0, 10, i %%10), i))
      y <- c(y, rep((i-1) %/% 10 + 1, i))
  }
# (1)
t(table(x, y))
##    x
## y    1  2  3  4  5  6  7  8  9 10
##   1  1  2  3  4  5  6  7  8  9 10
##   2 11 12 13 14 15 16 17 18 19 20
##   3 21 22 23 24 25 26 27 28 29 30
##   4 31 32 33 34 35 36 37 38 39 40
##   5 41 42 43 44 45 46 47 48 49 50
layout(matrix(c(1, 1, 2, 3), ncol = 2, byrow = T))
sunflowerplot(x, y, ylim = c(0.5, 5.2))
title(main = "sunflowerplot's petals")
text(x=ifelse(1:50 %% 10==0, 10, 1:50 %% 10), y=((1:50 - 1)%/%10+1) - 0.5,
       as.character(1:50))

plot(x, y, pch = 16)
title(main = "scatter plot by plot")

plot(jitter(x), jitter(y), pch = 16)
title(main = "scatter plot by plot using jitter")

par(mfrow=c(1,1))
set.seed(101)
x <- sample(1:5, 200, replace = T)
set.seed(102)
y <- sample(1:5, 200, replace = T)

sunflowerplot(x, y)

sunflowerplot(1:10, rep(1, 10), ylim = c(0.5, 5.5), number = 1:10,
                pch = 21, col = "blue", bg = "green", cex = 2)
text(5, 1.3, "number = 1:10, pch = 21, col = \"blue\", bg = \"green\", cex = 2", adj = 0.5)

sunflowerplot(1:10, rep(2, 10), add = T, number = 1:10,
                seg.col = "gold", size = 1/3, seg.lwd = 2.5)
text(5, 2.3, "add = T, number = 1:10, seg.col = \"gold\", size = 1/3, seg.lwd = 2.5", adj = 0.5)

sunflowerplot(1:10, rep(3, 10), add = T, number = 1:10, cex.fact = 0.5)
text(5, 3.3, "add = T, number = 1:10, cex.fact = 0.5", adj = 0.5)

sunflowerplot(1:10, rep(4, 10), add = T, number = 1:10, cex.fact = 2.0)
text(5, 4.3, "add = T, number = 1:10, cex.fact = 2.0", adj = 0.5)

sunflowerplot(1:10, rep(5, 10), add = T, number = 1:10, rotate = TRUE)
text(5, 5.3, "add = T, number = 1:10, rotate = TRUE", adj = 0.5)

# 3.2.11 symbols() 함수 : 다차원의 자료를 2차원 그림으로 표현
x <- round(rnorm(20), 2)
y <- round(rnorm(20), 2)
z1 <- abs(round(rnorm(20), 2))
z2 <- abs(round(rnorm(20), 2))
z3 <- round(runif(20), 2)
z4 <- round(runif(20), 2)
z5 <- round(runif(20), 2)

symbols(x, y, circles = abs(x), inches = 0.2, bg = 1:20)
title(main = "symbols are circles")

symbols(x, y, squares = abs(x), inches = 0.2, bg = 1:20)
title(main = "symbols are squares")

symbols(x, y, rectangles = cbind(abs(x), abs(y)), inches = 0.2, bg = 1:20)
title(main = "symbols are rectangles")

symbols(x, y, stars = cbind(abs(x), abs(y), z1, z2, z3), inches = 0.2, bg=1:20)
title(main = "symbols are stars")

symbols(x, y, thermometers = cbind(abs(x), abs(y), z4), inches=0.2, bg=1:20)
title(main = "symbols are thermometers")

symbols(x, y, boxplots = cbind(abs(x), abs(y), z3, z4, z5), inches=0.2, bg=1:20)
title(main = "symbols are boxplots")

N <- nrow(trees)
palette(rainbow(N, end = 0.9))
with(trees, {
    symbols(Height, Volume, circles = Girth/16, inches = FALSE, bg = 1:N,
            fg = "gray30", main = "symbols(*, circles = Girth/16, inches = FALSE)")
    symbols(Height, Volume, circles = Girth/16, inches = TRUE, bg = 1:N,
            fg = "gray30", main = "symbols(*, circles = Girth/16,inches = TRUE)")
    symbols(Height, Volume, circles = Girth/16, inches = 0.1, bg = 1:N,
            fg = "gray30", main = "symbols(*, circles = Girth/16, inches = 0.1)")
})

palette("default")


# 3.2.12 assocplot() 함수 : 서로 다른 2개의 범주형 자료의 시각화

 HairEyeColor
## , , Sex = Male
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    32   11    10     3
##   Brown    53   50    25    15
##   Red      10   10     7     7
##   Blond     3   30     5     8
## 
## , , Sex = Female
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8
x <- margin.table(HairEyeColor, c(1, 2))
x
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    68   20    15     5
##   Brown   119   84    54    29
##   Red      26   17    14    14
##   Blond     7   94    10    16
assocplot(x, main = "Relation between hair and eye color")

chisq.test(x)
## 
##  Pearson's Chi-squared test
## 
## data:  x
## X-squared = 138.29, df = 9, p-value < 2.2e-16
# 3.2.13 fourfolodplot() 함수

admis <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(admis)[[2]] <- c("Yes", "No")
names(dimnames(admis)) <- c("Sex", "Admit?", "Department")
admis
## , , Department = A
## 
##         Admit?
## Sex      Yes  No
##   Male   512 313
##   Female  89  19
## 
## , , Department = B
## 
##         Admit?
## Sex      Yes  No
##   Male   353 207
##   Female  17   8
## 
## , , Department = C
## 
##         Admit?
## Sex      Yes  No
##   Male   120 205
##   Female 202 391
## 
## , , Department = D
## 
##         Admit?
## Sex      Yes  No
##   Male   138 279
##   Female 131 244
## 
## , , Department = E
## 
##         Admit?
## Sex      Yes  No
##   Male    53 138
##   Female  94 299
## 
## , , Department = F
## 
##         Admit?
## Sex      Yes  No
##   Male    22 351
##   Female  24 317
ftable(admis) # 전공, 성별, 합격유무
##               Department   A   B   C   D   E   F
## Sex    Admit?                                   
## Male   Yes               512 353 120 138  53  22
##        No                313 207 205 279 138 351
## Female Yes                89  17 202 131  94  24
##        No                 19   8 391 244 299 317
# (1) 성별 합격여부의 데이터
admis.sex <- margin.table(admis, c(1, 2))
admis.sex
##         Admit?
## Sex       Yes   No
##   Male   1198 1493
##   Female  557 1278
# (2) 성별 합격/불합격 비율
prop.table(admis.sex, 1)
##         Admit?
## Sex            Yes        No
##   Male   0.4451877 0.5548123
##   Female 0.3035422 0.6964578
# (3)
fourfoldplot(admis.sex)

# (4)
fourfoldplot(admis)

# (5)
fourfoldplot(admis, margin = 2)

prop.table(admis[,,1], 1)
##         Admit?
## Sex            Yes        No
##   Male   0.6206061 0.3793939
##   Female 0.8240741 0.1759259
prop.table(admis[,,2], 1)
##         Admit?
## Sex            Yes        No
##   Male   0.6303571 0.3696429
##   Female 0.6800000 0.3200000
prop.table(admis[,,3], 1)
##         Admit?
## Sex            Yes        No
##   Male   0.3692308 0.6307692
##   Female 0.3406408 0.6593592
prop.table(admis[,,4], 1)
##         Admit?
## Sex            Yes        No
##   Male   0.3309353 0.6690647
##   Female 0.3493333 0.6506667
prop.table(admis[,,5], 1)
##         Admit?
## Sex            Yes        No
##   Male   0.2774869 0.7225131
##   Female 0.2391858 0.7608142
prop.table(admis[,,6], 1)
##         Admit?
## Sex             Yes         No
##   Male   0.05898123 0.94101877
##   Female 0.07038123 0.92961877
# 학부별 합격률
round(prop.table(margin.table(admis, c(2, 3)), 2) * 100, 2)
##       Department
## Admit?     A     B     C     D     E     F
##    Yes 64.42 63.25 35.08 33.96 25.17  6.44
##    No  35.58 36.75 64.92 66.04 74.83 93.56
# 3.2.14 mosaiocplot() 함수

mosaicplot(admis.sex, color = FALSE, las = 0, main = "color = FALSE, las = 0")

mosaicplot(admis.sex, color = TRUE, las = 1, dir = c("h", "v"),
           xlab = "Admit?", ylab = "Sex",
           main = "color = T, las = 1,dir = c(\"h\", \"v\"), xlab, ylab")

mosaicplot(~ Gender + Admit, data = UCBAdmissions, sort = c(2, 1),
           color = 2:3, las = 2,
           main = "formula, sort = c(2, 1), color = 2:3, las = 2")

mosaicplot(admis.sex, off = c(5, 20), las = 3, shade = TRUE,
            main = "off = c(5, 20), las = 3, shade = TRUE")

mosaicplot(admis, sort = c(3, 1, 2), shade = T, margin = list(c(1, 3), c(2, 3)),
    xlab = "Department", main = "Sex, Admit?, Department Mosaic Plots")

# 3.2.15 paris() 함수

pairs(~ Fertility + Education + Catholic, data = swiss,
    subset = Education < 20, main = "Swiss data, Education < 20",
    col = 1 + (swiss$Agriculture > 50), cex = 1.2,
    pch = 1 + (swiss$Agriculture > 50))

pairs(iris[1:4], main = "Anderson's Iris Data--3 species",
    pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

# 대각 패널 함수의 정의
panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}

pairs(USJudgeRatings[1:3], panel = panel.smooth,
    cex = 1.5, pch = 24, bg = "light blue",
    diag.panel = panel.hist, cex.labels = 2, font.labels = 2)

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits = digits)[1]
    txt <- paste(prefix, txt, sep = "")

    if(missing(cex.cor)) cex <- 0.8 / strwidth(txt)
    text(0.5, 0.5, txt, cex = cex * r)
}

pairs(USJudgeRatings[1:3], row1attop = FALSE, gap = 2,
    lower.panel = panel.smooth, upper.panel = panel.cor )

# 3.2.16 coplot() 함수

length(quakes$depth)
## [1] 1000
summary(quakes$depth)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    40.0    99.0   247.0   311.4   543.0   680.0
inter <- co.intervals(quakes$depth, number = 4, overlap = 0.1)
inter
##       [,1]  [,2]
## [1,]  39.5 107.5
## [2,]  96.5 260.5
## [3,] 238.5 544.5
## [4,] 534.5 680.5
inter[, 2] - inter[, 1]      # 등 간격이 아님
## [1]  68 164 306 146
length.inter <- as.numeric(0)
for (i in 1:4)
    length.inter[i] <- length(quakes$depth[inter[i, 1] <= quakes$depth &
                              quakes$depth <= inter[i, 2]])
length.inter                # 도수의 분포가 균일하게 나눔
## [1] 272 272 271 270
sum(length.inter)            # 도수의 합이 1000을 넘음
## [1] 1085
dim(quakes)
## [1] 1000    5
is.data.frame(quakes)
## [1] TRUE
names(quakes)
## [1] "lat"      "long"     "depth"    "mag"      "stations"
coplot(lat ~ long | depth, data = quakes)

dim(iris)
## [1] 150   5
is.data.frame(iris)
## [1] TRUE
names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
coplot(Sepal.Length ~ Sepal.Width | Species, data = iris)

formulas <- lat ~ long | depth * mag
coplot(formulas, data = quakes, col = "red", pch = 2,
         number = c(3, 4), bar.bg = c(num = "green", fac = "blue"))

# 3.2.17 satrs() 함수

op <- par(no.readonly = TRUE)
WESTarrests <- USArrests[state.region == "West",]                           # (1)
par(mfrow = c(2, 2))
stars(WESTarrests, draw.segments = FALSE, len = 0.7, key.loc = c(7, 2))     # (2)
title(main = "Stars Plot, unit key, len = 0.7")
stars(WESTarrests, draw.segments = TRUE, full = FALSE, key.loc = c(7, 2))   # (3)
title(main = "Segments Plot,unit key, full = F")
stars(WESTarrests, locations = c(0, 0), scale = TRUE, radius = FALSE,
        col.stars = 0, key.loc = c(0, 0))                                     # (4)
title(main = "Spider Plot,unit key, scale = T, radius = F")
stars(WESTarrests, locations = 0:1, scale = TRUE, draw.segments = TRUE,
        col.segments = 0, col.stars = 0, key.loc = 0:1)                       # (5)
title(main = "Radar Plot,unit key, scale = T")

par(op)


# 3.2.18 persp() 함수 # 3차원 함수

# (1) sinc 함수를 정의함
x <- seq(-10, 10, length = 30)
y <- x
f <- function(x, y) { r <- sqrt(x ^ 2 + y ^ 2); 10 * sin(r) / r }
z <- outer(x, y, f)
z[is.na(z)] <- 1        # 결측치의 값을 1로 바꾼다.

# (2) sinc 함수를 투시도로 그림
persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue",
        ltheta = 120, shade = 0.75, ticktype = "detailed",
        xlab = "X", ylab = "Y", zlab = "Sinc( r )") -> res
title(main="Perspective Plots with Sinc Function")
round(res, 3)      # persp() 함수의 반환 값
##       [,1]   [,2]   [,3]   [,4]
## [1,] 0.087 -0.025  0.043 -0.043
## [2,] 0.050  0.043 -0.075  0.075
## [3,] 0.000  0.074  0.042 -0.042
## [4,] 0.000 -0.273 -2.890  3.890
# (3) 3차원 좌표로 변환하는 함수
trans3d <- function(x, y, z, pmat) {
    tr <- cbind(x, y, z, 1) %*% pmat
    list(x = tr[,1] / tr[,4], y = tr[,2] / tr[,4])
}

xE <- c(-10,10); xy <- expand.grid(xE, xE)
points(trans3d(xy[,1], xy[,2], 6, pm = res), col = 2, pch = 16)
lines (trans3d(x, y =10, z = 6 + sin(x), pm = res), col = 3)

phi <- seq(0, 2 * pi, len = 201)
r1 <- 7.725
xr <- r1 * cos(phi)
yr <- r1 * sin(phi)
lines(trans3d(xr, yr, f(xr, yr), res), col = "pink", lwd = 2)

z <- 2 * volcano        # Exaggerate the relief
x <- 10 * (1:nrow(z))   # 10 meter spacing (S to N)
y <- 10 * (1:ncol(z))   # 10 meter spacing (E to W)

par(bg = "lavender")
persp(x, y, z, theta = 135, phi = 30, col = "green3", scale = FALSE,
        ltheta = -120, shade = 0.75, border = NA, box = FALSE)
title("Perspective Plots with volcano")

# 3.2.19 contour() 함수
rx <- range(x <- 10 * 1:nrow(volcano))
ry <- range(y <- 10 * 1:ncol(volcano))
ry <- ry + c(-1, 1) * (diff(rx) - diff(ry)) / 2
tcol <- terrain.colors(12)
plot(x = 0, y = 0, type = "n", xlim = rx, ylim = ry, xlab = "", ylab = "")
u <- par("usr")
rect(u[1], u[3], u[2], u[4], col = tcol[8], border = "red")
contour(x, y, volcano, col = tcol[2], lty = "solid", add = TRUE,
          vfont = c("sans serif", "plain"))
title("A Topographic Map of Maunga Whau", font = 4)
abline(h = 200 * 0:4, v = 200 * 0:4, col = "lightgray", lty = 2, lwd = 0.1)

line.list <- contourLines(x, y, volcano)  # (1) contourLines 호출
plot(x = 0, y = 0, type = "n", xlim = rx, ylim = ry, xlab = "", ylab = "")
rect(u[1], u[3], u[2], u[4], col = tcol[8], border = "red")
is.list(line.list)                        # (2) 리스트 여부 확인
## [1] TRUE
length(line.list)                          # (3) 성분의 개수
## [1] 20
names(line.list[[1]])                      # (4) 성분의 이름
## [1] "level" "x"     "y"
line.list[[1]]                            # (5) 첫 번째 성분 출력
## $level
## [1] 100
## 
## $x
##  [1] 870.0000 860.0000 850.9173 850.0000 840.0000 830.9173 830.0000
##  [8] 820.9173 820.0000 810.9173 810.0000 800.9173 800.0000 790.9173
## [15] 790.0000 780.9173 780.0000 770.4807 770.0000 760.3257 760.0000
## [22] 750.3257 750.0000 740.2463 740.0000 730.2463 730.0000 720.3257
## [29] 720.0000 710.4807 710.4807 710.0000 700.9173 700.0000 690.9173
## [36] 690.9173 690.9173 690.0000 680.9173 680.9173 680.9173 680.9173
## [43] 680.0000 670.9173 670.9173 670.9173 670.9173
## 
## $y
##  [1] 340.9173 340.9173 350.0000 350.9173 350.9173 360.0000 360.9173
##  [8] 370.0000 370.9173 380.0000 380.9173 390.0000 390.9173 400.0000
## [15] 400.9173 410.0000 410.4807 420.0000 420.3257 430.0000 430.3257
## [22] 440.0000 440.2463 450.0000 450.2463 460.0000 460.3257 470.0000
## [29] 470.4807 480.0000 490.0000 490.9173 500.0000 500.9173 510.0000
## [36] 520.0000 530.0000 530.9173 540.0000 550.0000 560.0000 570.0000
## [43] 570.9173 580.0000 590.0000 600.0000 610.0000
templines <- function(clines) {                # (6) 등고선과 라벨 출력 함수 정의
    lines(clines[[2]], clines[[3]])
    text(clines[[2]][1], clines[[3]][1], clines[[1]][1], cex = 0.5, col = "blue")
}
invisible(lapply(line.list, templines))        # (7) 등고선을 그린다.
title("A Topographic Map of Maunga Whau by contourLines", font=4)
abline(h = 200 * 0:4, v = 200*0:4, col = "lightgray", lty = 2, lwd = 0.1)

# 3.2.20 image() 함수

image(volcano, zlim = c(150, 200), xaxs = "r", yaxs = "r",
        xlab = "West to East", ylab = "South to North")
image(volcano, zlim = c(0, 150), add = T, col = cm.colors(12),
        xlab = "0 to 1", ylab = "0 to 1")
title(main = "image & add image")

x <- 10 * (1:nrow(volcano))
y <- 10 * (1:ncol(volcano))
image(x, y, volcano, col = terrain.colors(100), axes = FALSE)
contour(x, y, volcano, levels = seq(90, 200, by = 5),
          add = TRUE, col = "peru")
axis(1, at = seq(100, 800, by = 100))
axis(2, at = seq(100, 600, by = 100))
box()
title(main = "Maunga Whau Volcano", font.main = 4)

# 3.2.21 filled.contour 함수()

x <- 10 * 1:nrow(volcano)
y <- 10 * 1:ncol(volcano)
filled.contour(x, y, volcano, color = terrain.colors,
                 plot.title = title(main = "The Topography of Maunga Whau",
                                    xlab = "Meters North", ylab = "Meters West"),
                 plot.axes = { axis(1, seq(100, 800, by = 100))
                               axis(2, seq(100, 600, by = 100)) },
                 key.title = title(main = "Heightn(meters)"),
                 key.axes = axis(4, seq(90, 190, by = 10)))